import numpy as numpy
import scipy as scipy
from sklearn import cluster
from sklearn.datasets import make_blobs
import matplotlib.pyplot as plt

def color_cluster(i):
    # choose each class color
    colors = ('black', 'orange', 'yellow', 'magenta') 
    if (i == -1):
        color = 'blue'  # blue for noise samples
    else:
        if (i > NUM_CLASSES-1):
            color = 'cyan'
        else:
            color = colors[i]
    return color

def set2List(NumpyArray):
    list = []
    for item in NumpyArray:
        list.append(item.tolist())
    return list

def DBSCAN(Dataset, Epsilon,MinumumPoints,DistanceMethod = 'euclidean'):
    m,n=Dataset.shape
    Visited=numpy.zeros(m,'int')
    Type=numpy.zeros(m) #   -1 noise, outlier, 0 border, 1 core
    ClustersList=[]
    PointClusterNumber=numpy.zeros(m)
    PointClusterNumberIndex=1
    Neighbors=[]
    # Easy but not scalable implementation; DistanceMatriz.shape is m x m
    DistanceMatrix = scipy.spatial.distance.squareform(
                    scipy.spatial.distance.pdist(Dataset, DistanceMethod))
    for i in range(m):
        if Visited[i]==0:
            Visited[i]=1
            # vector of neighbors of the sample i
            Neighbors=numpy.where(DistanceMatrix[i]<Epsilon)[0] 
            if len(Neighbors)<MinumumPoints:  
                Type[i]=-1  # isolated sample: noise, outlier
            else:
                Cluster=[]
                Cluster.append(i)
                PointClusterNumber[i]=PointClusterNumberIndex                
                Neighbors=set2List(Neighbors)    
                ExpandCluster(Neighbors,Cluster,MinumumPoints,Epsilon,
                              Visited,DistanceMatrix,PointClusterNumber,
                              PointClusterNumberIndex)
                Cluster.append(Neighbors[:])
                ClustersList.append(Cluster[:])
                PointClusterNumberIndex=PointClusterNumberIndex+1
                                
    return PointClusterNumber 
 
def ExpandCluster(PointNeighbors,Cluster,MinumumPoints,Epsilon,
                  Visited,DistanceMatrix,PointClusterNumber,
                  PointClusterNumberIndex):
    Neighbors=[]
    for i in PointNeighbors:
        if Visited[i]==0:
            Visited[i]=1
            Neighbors=numpy.where(DistanceMatrix[i]<Epsilon)[0]
            if len(Neighbors)>=MinumumPoints:
                for j in Neighbors:
                    try:
                        PointNeighbors.index(j)
                    except ValueError:
                        PointNeighbors.append(j)
                    
        if PointClusterNumber[i]==0:
            Cluster.append(i)
            PointClusterNumber[i]=PointClusterNumberIndex
    return
 
NUM_CLASSES = 4 
NUM_FEATURES = 2
X,y = make_blobs(n_samples=80, n_features=NUM_FEATURES,
                 centers=NUM_CLASSES, cluster_std=1, random_state=14)
 
fig,axs = plt.subplots(1,NUM_CLASSES+1,figsize=(15,3), subplot_kw={'xticks':(), 'yticks':()})
axs[0].scatter(X[:,0], X[:,1], c = y)
axs[0].set_title('source samples')
  
Epsilon=1
MinumumPoints=3

for f, eps, min_samples in zip(range(NUM_CLASSES), 
                               [0.5,1,1.5,2], [2,3,4,5]):
    clusters = DBSCAN(X,eps,min_samples)
    for i in range(len(clusters)):
        axs[f+1].scatter(X[i,0],X[i,1], 
                         c = color_cluster(int(clusters[i])))
    axs[f+1].set_title('eps: '+ str(eps) + ', min_samp: ' +
                       str(min_samples))
    
plt.show()

